#############################################
## PROGRAM NAME: 205_desc_fig.R            ##
## AUTHOR: MATT MLECZKO                    ##
## INPUTS:                                 ##
##         005_cc_out.Rda                  ##
##         005_msas_keep.Rda               ##
##         201_af_msa_fin.Rda              ##
##         201_af_muni_fin.Rda             ##
##                                         ##
## OUTPUTS:                                ##
##          Figure 3                       ##
##                                         ## 
## PURPOSE: Create Figure 3 and            ##
##          version with non-imputed data  ##
##                                         ##
## LIST OF UPDATES:                        ##
#############################################

#log <- file(# USER DEFINED PATH AND FILE NAME HERE #)
#sink(log, append=TRUE)
#sink(log, append=TRUE, type="message")

set.seed(08542)

## load libraries ##

library(tidyverse)
library(foreign)
library(stringr)
library(tm)
library(gdata)
library(gsubfn)
library(haven)
library(readxl)
library(mice)
library(Rcpp)
library(basecamb)

## define paths ##
pr_path <- # USER DEFINED PATH HERE #

## set working directory ##
setwd(pr_path)

`%notin%` <- Negate(`%in%`)

## create a merge function that creates merge frequency as in Stata ##
## from user rwbuie at stackoverflow: https://stackoverflow.com/questions/30358401/is-there-a-way-to-create-statas-merge-indicator-variable-with-rs-merge ##
stata.merge <- function(x,y, name){
  x$df1 <- 1
  y$df2 <- 2
  df <- merge(x,y, by = name, all = TRUE)
  df$merge.variable <- rowSums(df[,c("df1", "df2")], na.rm=TRUE)
  df$df1 <- NULL
  df$df2<- NULL
  df
  #print(table(df$merge.variable))
  
  ## return the merged dataframe
  return(df)
}

## load analytic files ##

load("005_cc_out.Rda")
load("005_msas_keep.Rda")
load("201_af_msa_fin.Rda")
load("201_af_muni_fin.Rda")

## how extensive does the imputation need to be? ##

summary(af.muni.fin$zri_2006)
sd(af.muni.fin$zri_2006, na.rm=T)
summary(af.muni.fin$sindex1_2006)
summary(af.muni.fin$sindex2_2006)
summary(af.muni.fin$sindex3_2006)
summary(af.muni.fin$sindex4_2006)
summary(af.muni.fin$sindex5_2006)
summary(af.muni.fin$sindex6_2006)
summary(af.muni.fin$sindex7_2006)

sapply(af.muni.fin, function(x) sum(is.na(x)))

## prep data for mi ##
af.muni.rd <- af.muni.fin %>%
  select(GEOID,
           pop_total_2009,
           pop_density_2009,
           per_18t34_2009,
           per_35t64_2009,
           per_65over_2009,
           median_age_2009,
           per_asian_2009,
           per_black_2009,
           per_latino_2009,
           per_white_2009,
           entropy_er_2009,
           per_child_2009,
           per_nomove_2009,
           per_immigrant_2009,
           hownrt_2009,
           median_gross_rent_2009,
           median_prop_value_2009,
           vacant_rt_2009,
           median_yr_str_2009,
           median_hhld_inc_2009,
           hhld_pov_rt_2009,
           unem_rt_2009,
           gini_2009,
           pop_density_2009,
           sindex1_2006,
           sindex2_2006,
           sindex3_2006,
           sindex4_2006,
           sindex5_2006,
           sindex6_2006,
           sindex7_2006,
           sindex1_2022,
           sindex2_2022,
           sindex3_2022,
           sindex4_2022,
           sindex5_2022,
           sindex6_2022,
           sindex7_2022,
           zri_2006,
           zri_st_2006,
           zri_2022,
           zri_st_2022)
  
test <- af.muni.rd %>%
  select(-GEOID)
  
test.cor <- cor(test)
  
sapply(af.muni.rd, function(x) sum(is.na(x)))
  
## multiple imputation (1) ## 
## source: https://library.virginia.edu/data/articles/getting-started-with-multiple-imputation-in-r ##

imp <- mice(af.muni.rd, maxit=0)
  
## extract predictorMatrix and methods of imputation ##
  
predM <- imp$predictorMatrix
meth <- imp$method
det(predM)
  
## leave these out of the predictor matrix ##
predM[, c("GEOID")] <- 0
predM[, c("sindex3_2006")] <- 0
predM[, c("sindex6_2006")] <- 0
predM[, c("sindex7_2006")] <- 0
  
## check determinant ##
det(predM)
  
## check the method ##
meth
  
## run the imputation ##
  
af.muni.mi <- mice(af.muni.rd, m = 10, 
                    predictorMatrix = predM, 
                    method = "cart", print =  FALSE)

## pool all the imputed data sets, along with original ## 

af.muni.complete <- complete(af.muni.mi, "long", include = T)

summary(af.muni.complete)
sapply(af.muni.complete, function(x) sum(is.na(x)))


## PCA ## 
  
mi.pca <- function(impu){
    
  mi.pca.data <- af.muni.complete %>%
    filter(.imp == impu) %>%
    select(sindex1_2006, 
             sindex2_2006, 
             sindex3_2006, 
             sindex4_2006, 
             sindex5_2006, 
             sindex6_2006, 
             sindex7_2006)
    
  summary(mi.pca.data)
    
  mi.pca.res <- prcomp(mi.pca.data, center = T, scale=T)
  loadings <- mi.pca.res$rotation
    
  pca.data <- af.muni.complete %>%
    filter(.imp == impu) %>%
    mutate(zri_2006 = sindex1_2006*loadings[1,1] + 
               sindex2_2006*loadings[2,1] + 
               sindex3_2006*loadings[3,1] + 
               sindex4_2006*loadings[4,1] + 
               sindex5_2006*loadings[5,1] + 
               sindex6_2006*loadings[6,1] + 
               sindex7_2006*loadings[7,1])
    
  pca.data$zri_st_2006 <- (pca.data$zri_2006 - mean(pca.data$zri_2006, na.rm=T))/sd(pca.data$zri_2006,na.rm=T)
    
  return(pca.data)
    
}
  
mi.pca.out <- lapply(c(1,2,3,4,5,6,7,8,9,10), mi.pca)
  
## extract the pca output ## 
  
af.muni.out <- bind_rows(mi.pca.out)
  
summary(af.muni.out)
  
## get original data ##
  
af.muni.og <- af.muni.complete %>%
  filter(.imp == 0)
  
## bring on rest of covariates and outcomes ##
  
af.muni.m2 <- af.muni.fin %>%
  select(GEOID,
           pop_total_2022,
           per_18t34_2022,
           per_35t64_2022,
           per_65over_2022,
           median_age_2022,
           per_asian_2022,
           per_black_2022,
           per_latino_2022,
           per_white_2022,
           entropy_er_2022,
           per_child_2022,
           per_nomove_2022,
           per_immigrant_2022,
           hownrt_2022,
           median_gross_rent_2022,
           median_prop_value_2022,
           vacant_rt_2022,
           median_yr_str_2022,
           median_hhld_inc_2022,
           hhld_pov_rt_2022,
           unem_rt_2022,
           gini_2022,
           pop_density_2022)
  
## merge ##
af.muni.reg.m <- stata.merge(af.muni.out,
                             af.muni.m2,
                             "GEOID")
  
## check merge ## 
table(af.muni.reg.m$merge.variable, useNA = "ifany")
  
## imputed data ##
af.muni.imp <- af.muni.reg.m %>%
  filter(merge.variable == 3) %>%
  select(-merge.variable)
  
## now, original data ##
  
af.muni.m3 <- af.muni.fin %>%
  select(GEOID,
          pop_total_2022,
         per_18t34_2022,
         per_35t64_2022,
         per_65over_2022,
           median_age_2022,
           per_asian_2022,
           per_black_2022,
           per_latino_2022,
           per_white_2022,
           entropy_er_2022,
           per_child_2022,
           per_nomove_2022,
           per_immigrant_2022,
           hownrt_2022,
           median_gross_rent_2022,
           median_prop_value_2022,
           vacant_rt_2022,
           median_yr_str_2022,
           median_hhld_inc_2022,
           hhld_pov_rt_2022,
           unem_rt_2022,
           gini_2022,
           pop_density_2022)
  
## merge ##
af.muni.og.fin.m <- stata.merge(af.muni.og,
                                  af.muni.m3,
                                  "GEOID")
  
## check merge ## 
table(af.muni.og.fin.m$merge.variable, useNA = "ifany")
  
## final reg file ##
af.muni.og.fin <- af.muni.og.fin.m %>%
  filter(merge.variable == 3) %>%
  select(-merge.variable)
  
## combine data ##
  
af.muni.cb <- rbind(af.muni.og.fin,
                    af.muni.imp)
  
af.muni.cb.fin <- af.muni.cb[order(af.muni.cb$.imp, af.muni.cb$.id),]
  
## final vars ##
  
af.muni.final <- af.muni.cb.fin %>%
  mutate(zri.i = case_when(zri_st_2022 > zri_st_2006 + 0.5 ~ 1,
                           zri_st_2022 <= zri_st_2006 + 0.5 ~ 0),
           zri.d = case_when(zri_st_2022 < zri_st_2006 - 0.5 ~ 1,
                             zri_st_2022 >= zri_st_2006 - 0.5 ~ 0),
           zri.s = case_when(abs(zri_st_2022 - zri_st_2006) <= 0.5 ~ 1,
                             abs(zri_st_2022 - zri_st_2006) > 0.5 ~ 0),
           zri.id = abs(zri.i - zri.d),
           sindex1.change = sindex1_2022 - sindex1_2006,
           sindex2.change = sindex2_2022 - sindex2_2006,
           sindex3.change = sindex3_2022 - sindex3_2006,
           sindex4.change = sindex4_2022 - sindex4_2006,
           sindex5.change = sindex5_2022 - sindex5_2006,
           sindex6.change = sindex6_2022 - sindex6_2006,
           sindex7.change = sindex7_2022 - sindex7_2006,
           sindex1.i = case_when(sindex1_2022 > sindex1_2006 ~ 1,
                                 sindex1_2022 <= sindex1_2006 ~ 0),
           sindex2.i = case_when(sindex2_2022 > sindex2_2006 ~ 1,
                                 sindex2_2022 <= sindex2_2006 ~ 0),
           sindex3.i = case_when(sindex3_2022 > sindex3_2006 ~ 1,
                                 sindex3_2022 <= sindex3_2006 ~ 0),
           sindex4.i = case_when(sindex4_2022 > sindex4_2006 ~ 1,
                                 sindex4_2022 <= sindex4_2006 ~ 0),
           sindex5.i = case_when(sindex5_2022 > sindex5_2006 ~ 1,
                                 sindex5_2022 <= sindex5_2006 ~ 0),
           sindex6.i = case_when(sindex6_2022 > sindex6_2006 ~ 1,
                                 sindex6_2022 <= sindex6_2006 ~ 0),
           sindex7.i = case_when(sindex7_2022 > sindex7_2006 ~ 1,
                                 sindex7_2022 <= sindex7_2006 ~ 0),
           sindex1.d = case_when(sindex1_2022 < sindex1_2006 ~ 1,
                                 sindex1_2022 >= sindex1_2006 ~ 0),
           sindex2.d = case_when(sindex2_2022 < sindex2_2006 ~ 1,
                                 sindex2_2022 >= sindex2_2006 ~ 0),
           sindex3.d = case_when(sindex3_2022 < sindex3_2006 ~ 1,
                                 sindex3_2022 >= sindex3_2006 ~ 0),
           sindex4.d = case_when(sindex4_2022 < sindex4_2006 ~ 1,
                                 sindex4_2022 >= sindex4_2006 ~ 0),
           sindex5.d = case_when(sindex5_2022 < sindex5_2006 ~ 1,
                                 sindex5_2022 >= sindex5_2006 ~ 0),
           sindex6.d = case_when(sindex6_2022 < sindex6_2006 ~ 1,
                                 sindex6_2022 >= sindex6_2006 ~ 0),
           sindex7.d = case_when(sindex7_2022 < sindex7_2006 ~ 1,
                                 sindex7_2022 >= sindex7_2006 ~ 0),
           sindex1.s = case_when(sindex1_2022 == sindex1_2006 ~ 1,
                                 sindex1_2022 != sindex1_2006 ~ 0),
           sindex2.s = case_when(sindex2_2022 == sindex2_2006 ~ 1,
                                 sindex2_2022 != sindex2_2006 ~ 0),
           sindex3.s = case_when(sindex3_2022 == sindex3_2006 ~ 1,
                                 sindex3_2022 != sindex3_2006 ~ 0),
           sindex4.s = case_when(sindex4_2022 == sindex4_2006 ~ 1,
                                 sindex4_2022 != sindex4_2006 ~ 0),
           sindex5.s = case_when(sindex5_2022 == sindex5_2006 ~ 1,
                                 sindex5_2022 != sindex5_2006 ~ 0),
           sindex6.s = case_when(sindex6_2022 == sindex6_2006 ~ 1,
                                 sindex6_2022 != sindex6_2006 ~ 0),
           sindex7.s = case_when(sindex7_2022 == sindex7_2006 ~ 1,
                                 sindex7_2022 != sindex7_2006 ~ 0))
 
summary(af.muni.final$zri.i)
summary(af.muni.final$zri.d)
summary(af.muni.final$zri.s)
summary(af.muni.final$zri.id)
  
summary(af.muni.final$sindex1.change)
summary(af.muni.final$sindex2.change)
summary(af.muni.final$sindex3.change)
summary(af.muni.final$sindex4.change)
summary(af.muni.final$sindex5.change)
summary(af.muni.final$sindex6.change)
summary(af.muni.final$sindex7.change)
  
summary(af.muni.final$sindex1.i)
summary(af.muni.final$sindex1.d)
summary(af.muni.final$sindex1.s)
  
summary(af.muni.final$sindex2.i)
summary(af.muni.final$sindex2.d)
summary(af.muni.final$sindex2.s)
  
summary(af.muni.final$sindex3.i)
summary(af.muni.final$sindex3.d)
summary(af.muni.final$sindex3.s)
  
summary(af.muni.final$sindex4.i)
summary(af.muni.final$sindex4.d)
summary(af.muni.final$sindex4.s)
  
summary(af.muni.final$sindex5.i)
summary(af.muni.final$sindex5.d)
summary(af.muni.final$sindex5.s)
  
summary(af.muni.final$sindex6.i)
summary(af.muni.final$sindex6.d)
summary(af.muni.final$sindex6.s)
  
summary(af.muni.final$sindex7.i)
summary(af.muni.final$sindex7.d)
summary(af.muni.final$sindex7.s)
  
  
## keep matches ##
af.muni.mi3.in <- af.muni.final %>%
  arrange(.imp) %>%
  mutate(.id = rep(seq(1,nrow(af.muni.fin), by=1),11))
  
af.muni.mi3 <- as.mids(af.muni.mi3.in)
  
  
## append original data ##
nzlu.panel.og <- af.muni.final %>%
  filter(.imp == 0) %>%
  mutate(zri_2006 = NA,
           zri.i = NA,
           zri.d = NA,
           zri.s = NA,
           zri.id = NA,
           sindex1.change = NA,
           sindex2.change = NA,
           sindex3.change = NA,
           sindex4.change = NA,
           sindex5.change = NA,
           sindex6.change = NA,
           sindex7.change = NA,
           sindex1.i = NA,
           sindex1.d = NA,
           sindex1.s = NA,
           sindex2.i = NA,
           sindex2.d = NA,
           sindex2.s = NA,
           sindex3.i = NA,
           sindex3.d = NA,
           sindex3.s = NA,
           sindex4.i = NA,
           sindex4.d = NA,
           sindex4.s = NA,
           sindex5.i = NA,
           sindex5.d = NA,
           sindex5.s = NA,
           sindex6.i = NA,
           sindex6.d = NA,
           sindex6.s = NA,
           sindex7.i = NA,
           sindex7.d = NA,
           sindex7.s = NA)
  
nzlu.panel.cb <- rbind(nzlu.panel.og,
                       af.muni.final)
  
nzlu.panel.mi2 <- as.mids(af.muni.final)

## pooled results ## 

mi.res <- data.frame(pe = NA,
                     se = NA,
                     gr = NA,
                     direct = NA)

zri.i <- with(nzlu.panel.mi2,
              lm(zri.i ~ 1))

summary(pool(zri.i))

mi.res[1,1] <- summary(pool(zri.i))[2]
mi.res[1,2] <- summary(pool(zri.i))[3]
mi.res[1,3] <- "ZRI"
mi.res[1,4] <- "Increase"

zri.d <- with(nzlu.panel.mi2,
              lm(zri.d ~ 1))

summary(pool(zri.d))

mi.res[2,1] <- summary(pool(zri.d))[2]
mi.res[2,2] <- summary(pool(zri.d))[3]
mi.res[2,3] <- "ZRI"
mi.res[2,4] <- "Decrease"

zri.s <- with(nzlu.panel.mi2,
              lm(zri.s ~ 1))

summary(pool(zri.s))

mi.res[3,1] <- summary(pool(zri.s))[2]
mi.res[3,2] <- summary(pool(zri.s))[3]
mi.res[3,3] <- "ZRI"
mi.res[3,4] <- "No change"

zri.id <- with(nzlu.panel.mi2,
               lm(zri.id ~ 1))

summary(pool(zri.id))

sindex1.change <- with(nzlu.panel.mi2,
                       lm(sindex1.change ~ 1))

summary(pool(sindex1.change))

sindex2.change <- with(nzlu.panel.mi2,
                       lm(sindex2.change ~ 1))

summary(pool(sindex2.change))

sindex3.change <- with(nzlu.panel.mi2,
                       lm(sindex3.change ~ 1))

summary(pool(sindex3.change))

sindex4.change <- with(nzlu.panel.mi2,
                       lm(sindex4.change ~ 1))

summary(pool(sindex4.change))

sindex5.change <- with(nzlu.panel.mi2,
                       lm(sindex5.change ~ 1))

summary(pool(sindex5.change))

sindex6.change <- with(nzlu.panel.mi2,
                       lm(sindex6.change ~ 1))

summary(pool(sindex6.change))

sindex7.change <- with(nzlu.panel.mi2,
                       lm(sindex7.change ~ 1))

summary(pool(sindex7.change))


sindex1.i <- with(nzlu.panel.mi2,
                  lm(sindex1.i ~ 1))

summary(pool(sindex1.i))

mi.res[4,1] <- summary(pool(sindex1.i))[2]
mi.res[4,2] <- summary(pool(sindex1.i))[3]
mi.res[4,3] <- "EGCI"
mi.res[4,4] <- "Increase"

sindex1.d <- with(nzlu.panel.mi2,
                  lm(sindex1.d ~ 1))

summary(pool(sindex1.d))

mi.res[5,1] <- summary(pool(sindex1.d))[2]
mi.res[5,2] <- summary(pool(sindex1.d))[3]
mi.res[5,3] <- "EGCI"
mi.res[5,4] <- "Decrease"

sindex1.s <- with(nzlu.panel.mi2,
                  lm(sindex1.s ~ 1))

summary(pool(sindex1.s))

mi.res[6,1] <- summary(pool(sindex1.s))[2]
mi.res[6,2] <- summary(pool(sindex1.s))[3]
mi.res[6,3] <- "EGCI"
mi.res[6,4] <- "No change"

sindex2.i <- with(nzlu.panel.mi2,
                  lm(sindex2.i ~ 1))

summary(pool(sindex2.i))

mi.res[7,1] <- summary(pool(sindex2.i))[2]
mi.res[7,2] <- summary(pool(sindex2.i))[3]
mi.res[7,3] <- "OSRI"
mi.res[7,4] <- "Increase"

sindex2.d <- with(nzlu.panel.mi2,
                  lm(sindex2.d ~ 1))

summary(pool(sindex2.d))

mi.res[8,1] <- summary(pool(sindex2.d))[2]
mi.res[8,2] <- summary(pool(sindex2.d))[3]
mi.res[8,3] <- "OSRI"
mi.res[8,4] <- "Decrease"

sindex2.s <- with(nzlu.panel.mi2,
                  lm(sindex2.s ~ 1))

summary(pool(sindex2.s))

mi.res[9,1] <- summary(pool(sindex2.s))[2]
mi.res[9,2] <- summary(pool(sindex2.s))[3]
mi.res[9,3] <- "OSRI"
mi.res[9,4] <- "No change"

sindex3.i <- with(nzlu.panel.mi2,
                  lm(sindex3.i ~ 1))

summary(pool(sindex3.i))

mi.res[10,1] <- summary(pool(sindex3.i))[2]
mi.res[10,2] <- summary(pool(sindex3.i))[3]
mi.res[10,3] <- "MLSI"
mi.res[10,4] <- "Increase"

sindex3.d <- with(nzlu.panel.mi2,
                  lm(sindex3.d ~ 1))

summary(pool(sindex3.d))

mi.res[11,1] <- summary(pool(sindex3.d))[2]
mi.res[11,2] <- summary(pool(sindex3.d))[3]
mi.res[11,3] <- "MLSI"
mi.res[11,4] <- "Decrease"

sindex3.s <- with(nzlu.panel.mi2,
                  lm(sindex3.s ~ 1))

summary(pool(sindex3.s))

mi.res[12,1] <- summary(pool(sindex3.s))[2]
mi.res[12,2] <- summary(pool(sindex3.s))[3]
mi.res[12,3] <- "MLSI"
mi.res[12,4] <- "No change"

sindex4.i <- with(nzlu.panel.mi2,
                  lm(sindex4.i ~ 1))

summary(pool(sindex4.i))

mi.res[13,1] <- summary(pool(sindex4.i))[2]
mi.res[13,2] <- summary(pool(sindex4.i))[3]
mi.res[13,3] <- "NZI"
mi.res[13,4] <- "Increase"

sindex4.d <- with(nzlu.panel.mi2,
                  lm(sindex4.d ~ 1))

summary(pool(sindex4.d))

mi.res[14,1] <- summary(pool(sindex4.d))[2]
mi.res[14,2] <- summary(pool(sindex4.d))[3]
mi.res[14,3] <- "NZI"
mi.res[14,4] <- "Decrease"

sindex4.s <- with(nzlu.panel.mi2,
                  lm(sindex4.s ~ 1))

summary(pool(sindex4.s))

mi.res[15,1] <- summary(pool(sindex4.s))[2]
mi.res[15,2] <- summary(pool(sindex4.s))[3]
mi.res[15,3] <- "NZI"
mi.res[15,4] <- "No change"

sindex5.i <- with(nzlu.panel.mi2,
                  lm(sindex5.i ~ 1))

summary(pool(sindex5.i))

mi.res[16,1] <- summary(pool(sindex5.i))[2]
mi.res[16,2] <- summary(pool(sindex5.i))[3]
mi.res[16,3] <- "RZI"
mi.res[16,4] <- "Increase"

sindex5.d <- with(nzlu.panel.mi2,
                  lm(sindex5.d ~ 1))

summary(pool(sindex5.d))

mi.res[17,1] <- summary(pool(sindex5.d))[2]
mi.res[17,2] <- summary(pool(sindex5.d))[3]
mi.res[17,3] <- "RZI"
mi.res[17,4] <- "Decrease"

sindex5.s <- with(nzlu.panel.mi2,
                  lm(sindex5.s ~ 1))

summary(pool(sindex5.s))

mi.res[18,1] <- summary(pool(sindex5.s))[2]
mi.res[18,2] <- summary(pool(sindex5.s))[3]
mi.res[18,3] <- "RZI"
mi.res[18,4] <- "No change"

sindex6.i <- with(nzlu.panel.mi2,
                  lm(sindex6.i ~ 1))

summary(pool(sindex6.i))

mi.res[19,1] <- summary(pool(sindex6.i))[2]
mi.res[19,2] <- summary(pool(sindex6.i))[3]
mi.res[19,3] <- "MPDI"
mi.res[19,4] <- "Increase"

sindex6.d <- with(nzlu.panel.mi2,
                  lm(sindex6.d ~ 1))

summary(pool(sindex6.d))


mi.res[20,1] <- summary(pool(sindex6.d))[2]
mi.res[20,2] <- summary(pool(sindex6.d))[3]
mi.res[20,3] <- "MPDI"
mi.res[20,4] <- "Decrease"

sindex6.s <- with(nzlu.panel.mi2,
                  lm(sindex6.s ~ 1))

summary(pool(sindex6.s))

mi.res[21,1] <- summary(pool(sindex6.s))[2]
mi.res[21,2] <- summary(pool(sindex6.s))[3]
mi.res[21,3] <- "MPDI"
mi.res[21,4] <- "No change"

sindex7.i <- with(nzlu.panel.mi2,
                  lm(sindex7.i ~ 1))

summary(pool(sindex7.i))

mi.res[22,1] <- summary(pool(sindex7.i))[2]
mi.res[22,2] <- summary(pool(sindex7.i))[3]
mi.res[22,3] <- "IZPI"
mi.res[22,4] <- "Increase"

sindex7.d <- with(nzlu.panel.mi2,
                  lm(sindex7.d ~ 1))

summary(pool(sindex7.d))

mi.res[23,1] <- summary(pool(sindex7.d))[2]
mi.res[23,2] <- summary(pool(sindex7.d))[3]
mi.res[23,3] <- "IZPI"
mi.res[23,4] <- "Decrease"

sindex7.s <- with(nzlu.panel.mi2,
                  lm(sindex7.s ~ 1))

summary(pool(sindex7.s))

mi.res[24,1] <- summary(pool(sindex7.s))[2]
mi.res[24,2] <- summary(pool(sindex7.s))[3]
mi.res[24,3] <- "IZPI"
mi.res[24,4] <- "No change"

## compare ## 

nzlu.panel.comp <- af.muni.final %>%
  mutate(zri.i = case_when(zri_2022 > zri_2006 + 0.5 ~ 1,
                           TRUE ~ 0),
         zri.d = case_when(zri_2022 < zri_2006 - 0.5 ~ 1,
                           TRUE ~ 0),
         zri.s = case_when(abs(zri_2022 - zri_2006) <= 0.5 ~ 1,
                           TRUE ~ 0),
         sindex1.change = sindex1_2022 - sindex1_2006,
         sindex2.change = sindex2_2022 - sindex2_2006,
         sindex3.change = sindex3_2022 - sindex3_2006,
         sindex4.change = sindex4_2022 - sindex4_2006,
         sindex5.change = sindex5_2022 - sindex5_2006,
         sindex6.change = sindex6_2022 - sindex6_2006,
         sindex7.change = sindex7_2022 - sindex7_2006,
         sindex1.i = case_when(sindex1_2022 > sindex1_2006 ~ 1,
                               TRUE ~ 0),
         sindex2.i = case_when(sindex2_2022 > sindex2_2006 ~ 1,
                               TRUE ~ 0),
         sindex3.i = case_when(sindex3_2022 > sindex3_2006 ~ 1,
                               TRUE ~ 0),
         sindex4.i = case_when(sindex4_2022 > sindex4_2006 ~ 1,
                               TRUE ~ 0),
         sindex5.i = case_when(sindex5_2022 > sindex5_2006 ~ 1,
                               TRUE ~ 0),
         sindex6.i = case_when(sindex6_2022 > sindex6_2006 ~ 1,
                               TRUE ~ 0),
         sindex7.i = case_when(sindex7_2022 > sindex7_2006 ~ 1,
                               TRUE ~ 0),
         sindex1.d = case_when(sindex1_2022 < sindex1_2006 ~ 1,
                               TRUE ~ 0),
         sindex2.d = case_when(sindex2_2022 < sindex2_2006 ~ 1,
                               TRUE ~ 0),
         sindex3.d = case_when(sindex3_2022 < sindex3_2006 ~ 1,
                               TRUE ~ 0),
         sindex4.d = case_when(sindex4_2022 < sindex4_2006 ~ 1,
                               TRUE ~ 0),
         sindex5.d = case_when(sindex5_2022 < sindex5_2006 ~ 1,
                               TRUE ~ 0),
         sindex6.d = case_when(sindex6_2022 < sindex6_2006 ~ 1,
                               TRUE ~ 0),
         sindex7.d = case_when(sindex7_2022 < sindex7_2006 ~ 1,
                               TRUE ~ 0),
         sindex1.s = case_when(sindex1_2022 == sindex1_2006 ~ 1,
                               TRUE ~ 0),
         sindex2.s = case_when(sindex2_2022 == sindex2_2006 ~ 1,
                               TRUE ~ 0),
         sindex3.s = case_when(sindex3_2022 == sindex3_2006 ~ 1,
                               TRUE ~ 0),
         sindex4.s = case_when(sindex4_2022 == sindex4_2006 ~ 1,
                               TRUE ~ 0),
         sindex5.s = case_when(sindex5_2022 == sindex5_2006 ~ 1,
                               TRUE ~ 0),
         sindex6.s = case_when(sindex6_2022 == sindex6_2006 ~ 1,
                               TRUE ~ 0),
         sindex7.s = case_when(sindex7_2022 == sindex7_2006 ~ 1,
                               TRUE ~ 0))

nzlu.panel.comp.rd <- nzlu.panel.comp %>%
  summarize(zri.i = mean(zri.i, na.rm=T),
            zri.d = mean(zri.d, na.rm=T),
            zri.s = mean(zri.s, na.rm=T),
            sindex1.i = mean(sindex1.i, na.rm=T),
            sindex1.d = mean(sindex1.d, na.rm=T),
            sindex1.s = mean(sindex1.s, na.rm=T),
            sindex2.i = mean(sindex2.i, na.rm=T),
            sindex2.d = mean(sindex2.d, na.rm=T),
            sindex2.s = mean(sindex2.s, na.rm=T),
            sindex3.i = mean(sindex3.i, na.rm=T),
            sindex3.d = mean(sindex3.d, na.rm=T),
            sindex3.s = mean(sindex3.s, na.rm=T),
            sindex4.i = mean(sindex4.i, na.rm=T),
            sindex4.d = mean(sindex4.d, na.rm=T),
            sindex4.s = mean(sindex4.s, na.rm=T),
            sindex5.i = mean(sindex5.i, na.rm=T),
            sindex5.d = mean(sindex5.d, na.rm=T),
            sindex5.s = mean(sindex5.s, na.rm=T),
            sindex6.i = mean(sindex6.i, na.rm=T),
            sindex6.d = mean(sindex6.d, na.rm=T),
            sindex6.s = mean(sindex6.s, na.rm=T),
            sindex7.i = mean(sindex7.i, na.rm=T),
            sindex7.d = mean(sindex7.d, na.rm=T),
            sindex7.s = mean(sindex7.s, na.rm=T))

nzlu.panel.comp.rd <- t(nzlu.panel.comp.rd)

mi.res.fin <- mi.res %>%
  mutate(lb = pe - 1.96 * se,
         ub = pe + 1.96 * se)

mi.res.fin$gr.f <- factor(mi.res.fin$gr,
                          levels = c("ZRI",
                                     "EGCI",
                                     "OSRI",
                                     "MLSI",
                                     "RZI",
                                     "NZI",
                                     "MPDI",
                                     "IZPI"))

levels(mi.res.fin$gr.f)

## final figure ##
## thanks to @ Aaron for help removing leading zeros from y axis: https://stackoverflow.com/questions/42578262/remove-leading-zeros-in-ggplot2-scales

ggplot(mi.res.fin, aes(x=gr.f, y=pe, fill = direct)) + 
  geom_bar(stat="identity", position = "dodge") + 
  scale_fill_brewer(palette = "Set2") + 
  scale_y_continuous(breaks = c("0" = 0, ".25" = 0.25, ".50" = 0.5, ".75" = 0.75)) +
  geom_errorbar(aes(ymin=lb, ymax=ub), width=.2, position=position_dodge(.9)) + 
  labs(y = "Proportion",
       x = "") + 
  ggtitle("") + 
  guides(fill=guide_legend(title="")) + 
  theme_bw() +
  theme(
    axis.title = element_text(face = "bold"),
    axis.text.x = element_text(size = 12, colour = "black", face = "bold"),
    axis.text.y = element_text(size = 12, colour = "black", face = "bold"))


######################
## now, non-imputed ##
######################

test <- af.muni.fin %>%
  mutate(zri_increase = ifelse(zri_st_2022 > zri_st_2006 + 0.5, 1, 0),
         zri_decrease = ifelse(zri_st_2022 < zri_st_2006 - 0.5, 1, 0),
         zri_nochange = case_when(abs(zri_st_2022 - zri_st_2006) <= 0.5 ~ 1,
                                  abs(zri_st_2022 - zri_st_2006) > 0.5 ~ 0))

mean(test$zri_increase,na.rm=T)
mean(test$zri_decrease,na.rm=T)
mean(test$zri_nochange,na.rm=T)

mean(test$zri_increase,na.rm=T) + mean(test$zri_decrease,na.rm=T) + mean(test$zri_nochange,na.rm=T)



test <- af.muni.fin %>%
  mutate(zri.i = case_when(zri_st_2022 > zri_st_2006 + 0.5 ~ 1,
                           zri_st_2022 <= zri_st_2006 + 0.5 ~ 0),
         zri.d = case_when(zri_st_2022 < zri_st_2006 - 0.5 ~ 1,
                           zri_st_2022 >= zri_st_2006 - 0.5 ~ 0),
         zri.s = case_when(abs(zri_st_2022 - zri_st_2006) <= 0.5 ~ 1,
                           abs(zri_st_2022 - zri_st_2006) > 0.5 ~ 0),
         zri.id = abs(zri.i - zri.d),
         sindex1.change = sindex1_2022 - sindex1_2006,
         sindex2.change = sindex2_2022 - sindex2_2006,
         sindex3.change = sindex3_2022 - sindex3_2006,
         sindex4.change = sindex4_2022 - sindex4_2006,
         sindex5.change = sindex5_2022 - sindex5_2006,
         sindex6.change = sindex6_2022 - sindex6_2006,
         sindex7.change = sindex7_2022 - sindex7_2006,
         sindex1.i = case_when(sindex1_2022 > sindex1_2006 ~ 1,
                               sindex1_2022 <= sindex1_2006 ~ 0),
         sindex2.i = case_when(sindex2_2022 > sindex2_2006 ~ 1,
                               sindex2_2022 <= sindex2_2006 ~ 0),
         sindex3.i = case_when(sindex3_2022 > sindex3_2006 ~ 1,
                               sindex3_2022 <= sindex3_2006 ~ 0),
         sindex4.i = case_when(sindex4_2022 > sindex4_2006 ~ 1,
                               sindex4_2022 <= sindex4_2006 ~ 0),
         sindex5.i = case_when(sindex5_2022 > sindex5_2006 ~ 1,
                               sindex5_2022 <= sindex5_2006 ~ 0),
         sindex6.i = case_when(sindex6_2022 > sindex6_2006 ~ 1,
                               sindex6_2022 <= sindex6_2006 ~ 0),
         sindex7.i = case_when(sindex7_2022 > sindex7_2006 ~ 1,
                               sindex7_2022 <= sindex7_2006 ~ 0),
         sindex1.d = case_when(sindex1_2022 < sindex1_2006 ~ 1,
                               sindex1_2022 >= sindex1_2006 ~ 0),
         sindex2.d = case_when(sindex2_2022 < sindex2_2006 ~ 1,
                               sindex2_2022 >= sindex2_2006 ~ 0),
         sindex3.d = case_when(sindex3_2022 < sindex3_2006 ~ 1,
                               sindex3_2022 >= sindex3_2006 ~ 0),
         sindex4.d = case_when(sindex4_2022 < sindex4_2006 ~ 1,
                               sindex4_2022 >= sindex4_2006 ~ 0),
         sindex5.d = case_when(sindex5_2022 < sindex5_2006 ~ 1,
                               sindex5_2022 >= sindex5_2006 ~ 0),
         sindex6.d = case_when(sindex6_2022 < sindex6_2006 ~ 1,
                               sindex6_2022 >= sindex6_2006 ~ 0),
         sindex7.d = case_when(sindex7_2022 < sindex7_2006 ~ 1,
                               sindex7_2022 >= sindex7_2006 ~ 0),
         sindex1.s = case_when(sindex1_2022 == sindex1_2006 ~ 1,
                               sindex1_2022 != sindex1_2006 ~ 0),
         sindex2.s = case_when(sindex2_2022 == sindex2_2006 ~ 1,
                               sindex2_2022 != sindex2_2006 ~ 0),
         sindex3.s = case_when(sindex3_2022 == sindex3_2006 ~ 1,
                               sindex3_2022 != sindex3_2006 ~ 0),
         sindex4.s = case_when(sindex4_2022 == sindex4_2006 ~ 1,
                               sindex4_2022 != sindex4_2006 ~ 0),
         sindex5.s = case_when(sindex5_2022 == sindex5_2006 ~ 1,
                               sindex5_2022 != sindex5_2006 ~ 0),
         sindex6.s = case_when(sindex6_2022 == sindex6_2006 ~ 1,
                               sindex6_2022 != sindex6_2006 ~ 0),
         sindex7.s = case_when(sindex7_2022 == sindex7_2006 ~ 1,
                               sindex7_2022 != sindex7_2006 ~ 0))

test.res <- data.frame(pe = rep(NA,24),
                       gr = c(rep("ZRI",3),
                              rep("EGCI",3),
                              rep("OSRI",3),
                              rep("MLSI",3),
                              rep("NZI",3),
                              rep("RZI",3),
                              rep("MPDI",3),
                              rep("IZPI",3)),
                       direct = rep(c("Increase","Decrease","No change"),8))

test.res$pe[1] <- mean(test$zri.i, na.rm=T)
test.res$pe[2] <-mean(test$zri.d, na.rm=T)
test.res$pe[3] <-mean(test$zri.s, na.rm=T)
mean(test$zri.id, na.rm=T)

mean(test$sindex1.change, na.rm=T)
mean(test$sindex2.change, na.rm=T)
mean(test$sindex3.change, na.rm=T)
mean(test$sindex4.change, na.rm=T)
mean(test$sindex5.change, na.rm=T)
mean(test$sindex6.change, na.rm=T)
mean(test$sindex7.change, na.rm=T)

test.res$pe[4] <-mean(test$sindex1.i, na.rm=T)
test.res$pe[5] <-mean(test$sindex1.d, na.rm=T)
test.res$pe[6] <-mean(test$sindex1.s, na.rm=T)

test.res$pe[7] <-mean(test$sindex2.i, na.rm=T)
test.res$pe[8] <-mean(test$sindex2.d, na.rm=T)
test.res$pe[9] <-mean(test$sindex2.s, na.rm=T)

test.res$pe[10] <-mean(test$sindex3.i, na.rm=T)
test.res$pe[11] <-mean(test$sindex3.d, na.rm=T)
test.res$pe[12] <-mean(test$sindex3.s, na.rm=T)

test.res$pe[13] <-mean(test$sindex4.i, na.rm=T)
test.res$pe[14] <-mean(test$sindex4.d, na.rm=T)
test.res$pe[15] <-mean(test$sindex4.s, na.rm=T)

test.res$pe[16] <-mean(test$sindex5.i, na.rm=T)
test.res$pe[17] <-mean(test$sindex5.d, na.rm=T)
test.res$pe[18] <-mean(test$sindex5.s, na.rm=T)

test.res$pe[19] <-mean(test$sindex6.i, na.rm=T)
test.res$pe[20] <-mean(test$sindex6.d, na.rm=T)
test.res$pe[21] <-mean(test$sindex6.s, na.rm=T)

test.res$pe[22] <-mean(test$sindex7.i, na.rm=T)
test.res$pe[23] <-mean(test$sindex7.d, na.rm=T)
test.res$pe[24] <-mean(test$sindex7.s, na.rm=T)

test.res$gr.f <- factor(test.res$gr,
                        levels = c("ZRI",
                                     "EGCI",
                                     "OSRI",
                                     "MLSI",
                                     "RZI",
                                     "NZI",
                                     "MPDI",
                                     "IZPI"))

levels(test.res$gr.f)


## plot ## 

ggplot(test.res, aes(x=gr.f, y=pe, fill = direct)) + 
  geom_bar(stat="identity", position = "dodge") + 
  scale_fill_brewer(palette = "Set2") + 
  labs(y = "Fraction",
       x = "") + 
  ggtitle("") + 
  guides(fill=guide_legend(title="")) + 
  theme_bw()


## END OF PROGRAM ##
#sink()

